# This script recreates the statistical analysis present in the paper, and for Fig. 4.
# Descriptive plots (Figure 1-3) can be recreated using the script "Script for Plots.R".
# Required packages: tidyverse, lmtest, sandwich, car; stargazer for table recreation
# Required data files: data_panelled.RData
# Contact for any questions or clarifications: Dino Wildi, to be reached at dino.wildi@ucdconnect.ie

library(tidyverse)
library(stargazer)
library(sandwich)
library(lmtest)
library(car)

load("data_panelled.RData")

#### MAIN PAPER ANALYSIS (TABLE 1) ####

varnames <- c("MSP voter availability to RSP",
              "RRP voter availability to RSP",
              "RRP vote share (t-1)",
              "LDV",
              "MSP vote share (t-1)",
              "RRP position",
              "Immigrants since last election")

#Model Specifications (see paper text for explanation of variables)
model.1.spec <- "multicult ~ lag_chall + ldv_abs"
model.2.spec <- "multicult ~ wagner_loss + wagner_winback + ldv_abs"
model.3.spec <- "multicult ~ wagner_loss + wagner_winback + lag_chall + ldv_abs"
model.4.spec <- "multicult ~ wagner_loss + wagner_winback + lag_chall + challpos + ldv_abs"
model.5.spec <- "multicult ~ wagner_loss + wagner_winback + lag_chall + lag_size + challpos + delta_immig + ldv_abs"

#Model calculations
model.1 <- lm(model.1.spec, ds)
model.2 <- lm(model.2.spec, ds)
model.3 <- lm(model.3.spec, ds)
model.4 <- lm(model.4.spec, ds)
model.5 <- lm(model.5.spec, ds)

#prepare standard errors
m1_ses <- sqrt(diag(vcovCL(model.1, cluster = ~ party)))
m2_ses <- sqrt(diag(vcovCL(model.2, cluster = ~ party)))
m3_ses <- sqrt(diag(vcovCL(model.3, cluster = ~ party)))
m4_ses <- sqrt(diag(vcovCL(model.4, cluster = ~ party)))
m5_ses <- sqrt(diag(vcovCL(model.5, cluster = ~ party)))

#Table 1 with clustered SEs
stargazer(model.1, model.2, model.3, model.4, model.5,
          type = "html", out = "Tables/Position_main.html",
          order = c("wagner_loss", "wagner_winback", "lag_chall", "ldv_abs"),
          se = list(m1_ses, m2_ses, m3_ses, m4_ses, m5_ses),
          omit.stat = "f",
          covariate.labels = varnames)

#### MAIN PAPER ANALYSIS (TABLE 2) ####

model.6.spec <- "salience ~ lag_chall + ldv_salience"
model.7.spec <- "salience ~ wagner_loss + wagner_winback + ldv_salience"
model.8.spec <- "salience ~ wagner_loss + wagner_winback + lag_chall + ldv_salience"
model.9.spec <- "salience ~ wagner_loss + wagner_winback + lag_chall + challpos + ldv_salience"
model.0.spec <- "salience ~ wagner_loss + wagner_winback + lag_chall + lag_size + challpos + delta_immig + ldv_salience"

#Model calculations
model.6 <- lm(model.6.spec, ds)
model.7 <- lm(model.7.spec, ds)
model.8 <- lm(model.8.spec, ds)
model.9 <- lm(model.9.spec, ds)
model.0 <- lm(model.0.spec, ds)

#prepare standard errors
m6_ses <- sqrt(diag(vcovCL(model.6, cluster = ~ party)))
m7_ses <- sqrt(diag(vcovCL(model.7, cluster = ~ party)))
m8_ses <- sqrt(diag(vcovCL(model.8, cluster = ~ party)))
m9_ses <- sqrt(diag(vcovCL(model.9, cluster = ~ party)))
m0_ses <- sqrt(diag(vcovCL(model.0, cluster = ~ party)))

#Table 1 with clustered SEs
stargazer(model.6, model.7, model.8, model.9, model.0,
          type = "html", out = "Tables/Salience_main.html",
          order = c("wagner_loss", "wagner_winback", "lag_chall", "ldv_salience"),
          se = list(m6_ses, m7_ses, m8_ses, m9_ses, m0_ses),
          omit.stat = "f",
          covariate.labels = varnames)

#### FIGURE 4: VISUALISING SCENARIOS ####

##   1) Stable vote share, threat varies 
##   2) Stable threat level, vote share varies 
##   3) Vote share and threat level vary in tandem
##   4) Stable threat level, vote share rises from zero

# Create dataset to predict from:

scenarios <- tibble(scenario = c(0.9,1,1.1,1.9,2,2.1,2.9,3,3.1,3.9,4,4.1),
                    lag_chall = c(mean(ds$challshare),mean(ds$challshare),mean(ds$challshare),
                                  mean(ds$challshare),mean(ds$challshare)+sd(ds$challshare),mean(ds$challshare)-sd(ds$challshare),
                                  mean(ds$challshare),mean(ds$challshare)+sd(ds$challshare),mean(ds$challshare)-sd(ds$challshare),
                                  0,8.73,14.35),
                    wagner_loss = c(mean(ds$wagner_loss), mean(ds$wagner_loss)+sd(ds$wagner_loss), mean(ds$wagner_loss)-sd(ds$wagner_loss),
                                    mean(ds$wagner_loss), mean(ds$wagner_loss), mean(ds$wagner_loss),
                                    mean(ds$wagner_loss), mean(ds$wagner_loss)+sd(ds$wagner_loss), mean(ds$wagner_loss)-sd(ds$wagner_loss),
                                    0.2572,0.2572,0.2572),
                    ldv_abs = mean(ds$ldv_abs),
                    wagner_winback = mean(ds$wagner_winback),
                    level = c("Medium", "High", "Low","Medium", "High", "Low",
                              "Medium", "High", "Low","Low","Medium","High"))

# Create predictions

scenarios$fit <- predict(model.3, newdata = scenarios)
scenarios.predicted <- data.frame(predict(model.3, 
                                          newdata = scenarios, 
                                          interval = "confidence"))
scenarios <- unique(left_join(scenarios,scenarios.predicted))

# Create Figure 4

fig4 <- ggplot(scenarios, aes(y = scenario, x = fit)) + 
  geom_point(aes(shape = level), size = 3, fill = "black") +
  geom_errorbarh(aes(xmin = lwr, xmax = upr, y = scenario, linetype = level),
                 height = 0.1) +
  scale_y_continuous(breaks = c(1,2,3,4), trans = "reverse",
                     labels = c("1) Availability", "2) AIP share", "3) Both", "4) New party")) +
  scale_shape_manual(name = "Level of varying variable(s)",
                     values = c(17,25,23)) +
  scale_linetype_discrete(name = "Level of varying variable(s)") +
  labs(x = "Predicted MSP position on immigration",
       y = "Changing variable") +
  theme_minimal() +
  theme(legend.position = "bottom",
        legend.text = element_text(size = 14),
        axis.title = element_text(size = 16),
        axis.text = element_text(size = 14))
fig4

#### ROBUSTNESS TESTS ####

#### ROBUSTNESS TESTS (see appendix D) ####

# Models with alternative scaling
lowe.low <- lm(multicult_low ~ wagner_loss + wagner_winback + lag_chall + ldv_low, data = ds)
lowe.high <- lm(multicult_high ~ wagner_loss + wagner_winback + lag_chall + ldv_high, data = ds)
kf <- lm(multicult_kf ~ wagner_loss + wagner_winback + lag_chall + ldv_kf, data = ds)
kf.nona <- lm(multicult_kf_nona ~ wagner_loss + wagner_winback + lag_chall + ldv_kf_nona, data = ds)

#prepare standard errors
lowe.low_ses <- sqrt(diag(vcovCL(lowe.low, cluster = ~ party)))
lowe.high_ses <- sqrt(diag(vcovCL(lowe.high, cluster = ~ party)))
kf_ses <- sqrt(diag(vcovCL(kf, cluster = ~ party)))
kf.nona_ses <- sqrt(diag(vcovCL(kf.nona, cluster = ~ party)))

stargazer(model.3, lowe.low, lowe.high, kf, kf.nona,
          type = "html", out = "Tables/Robustness_Scaling.html",
          order = c("wagner_loss", "wagner_winback", "lag_chall", "ldv_abs"),
          se = list(m3_ses, lowe.low_ses, lowe.high_ses, kf_ses, kf.nona_ses),
          omit.stat = "f",
          covariate.labels = c("MSP voter availability to RSP",
                               "RRP voter availability to RSP",
                               "RRP vote share (t-1)",
                               "LDV"))

# Models with alternative specification of the dependent variable
model.meguid <- lm(prr ~ wagner_loss + wagner_winback + lag_chall + lag_prr,
                  data = ds)
narrow <- lm(multicult.narrow ~ wagner_loss + wagner_winback + lag_chall + lag_narrow,
             data = ds)

meguid_ses <- sqrt(diag(vcovCL(model.meguid, cluster = ~ party)))
narrow_ses <- sqrt(diag(vcovCL(narrow, cluster = ~ party)))

stargazer(model.3, model.meguid, narrow,
          type = "html", out = "Tables/Robustness_Items.html",
          order = c("wagner_loss", "wagner_winback", "lag_chall", "ldv_abs"),
          se = list(m3_ses, meguid_ses, narrow_ses),
          omit.stat = c("ser", "f"),
          covariate.labels = c("MSP voter availability to RSP",
                               "RRP voter availability to RSP",
                               "RRP vote share (t-1)",
                               "LDV"))

# One-sided threat models to account for multicollinearity

model.5.threat <- lm(multicult ~ wagner_loss + lag_chall + lag_size + challpos + delta_immig + ldv_abs, ds)
m5.threat_ses <- sqrt(diag(vcovCL(model.5.threat, cluster = ~ party)))

model.5.potential <- lm(multicult ~ wagner_winback + lag_chall + lag_size + challpos + delta_immig + ldv_abs, ds)
m5.potential_ses <- sqrt(diag(vcovCL(model.5.potential, cluster = ~ party)))


stargazer(model.5, model.5.threat, model.5.potential,
          type = "html", out = "Tables/Robustness_Onesided.html",
          order = c("wagner_loss", "wagner_winback", "lag_chall", "ldv_abs"),
          se = list(m5_ses, m5.threat_ses, m5.potential_ses),
          omit.stat = c("ser", "f"),
          covariate.labels = varnames)

#VIF test
vifmodel <- vif(model.5)
vifmodel <- as.data.frame(vifmodel)
vifmodel$variable <- rownames(vifmodel)

### Models without LDV and Fixed Effects

no_ldv <- lm(multicult ~ wagner_loss + wagner_winback + lag_chall + lag_size + challpos + delta_immig, ds)
summary(no_ldv)

noldv_ses <- sqrt(diag(vcovCL(no_ldv, cluster = ~ party)))


stargazer(model.5, no_ldv,
          type = "html", out = "Tables/Robustness_Omit_LDV.html",
          order = c("wagner_loss", "wagner_winback", "lag_chall", "ldv_abs"),
          se = list(m5_ses, noldv_ses),
          omit.stat = c("ser", "f"),
          covariate.labels = varnames)


fe_model <- fixest::feols(multicult ~ wagner_loss + wagner_winback + lag_chall + ldv_abs | party,
                          data = ds)
fe_model_fullcont <- fixest::feols(
  multicult ~ wagner_loss + wagner_winback + lag_chall + lag_size + challpos + delta_immig + ldv_abs | party,
  data = ds)

fe_model
fe_model_fullcont

### Interaction models
inter <- lm(multicult ~ wagner_loss + wagner_winback + lag_chall + wagner_winback*lag_chall + ldv_abs,
                    ds)
inter.controls <- lm(multicult ~ wagner_loss + wagner_winback + lag_chall + 
                               wagner_winback*lag_chall + ldv_abs + lag_size + challpos + delta_immig,
                    ds)

inter.robust <- coeftest(inter, vcov. = vcovCL(inter, cluster = ~party))
inter.controls.robust <- coeftest(inter.controls, vcov. = vcovCL(inter.controls, cluster = ~party))

stargazer(m3_robust, inter.robust, m5_robust, inter.controls.robust,
          type = "html", out = "interactions.html")

# Marginal effect plots
ip1 <- interplot::interplot(inter, 
                     var1 = "wagner_winback",
                     var2 = "lag_chall") +
  geom_hline(yintercept = 0, linetype = "dashed") +
  labs(title = "Marginal effect of availability on position",
       x = "RRP vote share at past election", y = "Position on immigration") +
  scale_y_continuous(breaks = seq(-7.5, 7.5, 2.5),
                     limits = c(-7.5, 7.5))

ip2 <- interplot::interplot(inter.controls, 
                     var1 = "wagner_winback",
                     var2 = "lag_chall") +
  geom_hline(yintercept = 0, linetype = "dashed") +
  labs(title = "Marginal effect of availability on position (full controls)",
       x = "RRP vote share at past election", y = "Position on immigration") +
  scale_y_continuous(breaks = seq(-7.5, 7.5, 2.5),
                     limits = c(-7.5, 7.5))

gridExtra::grid.arrange(ip1, ip2, ncol = 2)

#Linearity assumption
interflex::interflex(estimator = "binning", data = ds,
          Y = "multicult", X = "lag_chall", D = "wagner_winback")

### Time sensitivity: only cases within 2.5 years of EP election

timeclose <- filter(ds, date %in% c(199906:200300,
                                    200406:200700,
                                    200906:201200,
                                    201406:201700,
                                    201906:202200))

m3.close <- lm(model.3.spec, timeclose)
m3.close_robust <- coeftest(m3.close, vcov. = vcovCL(m3.close, cluster = ~party))                    
m5.close <- lm(model.5.spec, timeclose)
m5.close_robust <- coeftest(m5.close, vcov. = vcovCL(m5.close, cluster = ~party))

stargazer(m3_robust, m3.close_robust, m5_robust, m5.close_robust,
          type = "html", out = "Tables/RnR_Time.html",
          order = c("wagner_loss", "wagner_winback", "lag_chall", "ldv_abs"),
          covariate.labels = varnames)
